knitr::opts_chunk$set(message = FALSE, warning = FALSE, results='hide')
library(Seurat)
library(tidyverse)
library(edgeR)
library(scater)
library(ruv)
library(cluster) #silhouettes
library(factoextra) #silhouettes
source("/mnt/auxf.R")
path <- ('/domino/datasets/local/RUV')
Preprocessing
Pseudobulk matrices
PBC <- readRDS("/domino/datasets/local/RUV/pblupusds2.rds")
PBC_split <- split(PBC$samples$sample_cell,PBC$samples$ind_cov_batch_cov)
PBC_S <- DGEList(counts=sapply(PBC_split, function(x) rowSums(PBC$counts[,x])),
samples=unique(PBC$samples[,!colnames(PBC$samples) %in% c("cg_cov" ,"sample_cell","lib.size","norm.factors" )]))
Negative control genes and high variable genes
hkGagnon <- read.csv(paste0(path,"/Genes_Gagnon.txt"), sep="")
hkGagnon <- unlist(hkGagnon)
filter.vst <- FindVariableFeatures(PBC,selection.method = 'vst')
high_varg <- which(filter.vst[hkGagnon,]$vst.variance.standardized>1.8)
gene.D <- data.frame(gene=rownames(PBC))
rownames(gene.D) <- gene.D$gene
hk.ind <- rownames(PBC) %in% hkGagnon[-high_varg]
Assigning mock treatment
subs <- unique(PBC$samples$ind_cov)
set.seed(1)
treatment <- sample(c("A","B"),length(subs), replace=T)
PBC$samples <- PBC$samples %>% left_join(bind_cols(ind_cov = subs, fk.tr = treatment), by='ind_cov')
PBC_S$samples <- PBC_S$samples %>% left_join(bind_cols(ind_cov = subs, fk.tr = treatment), by='ind_cov')
Dataset

Unwanted variation in the data
ys.ct <- lapply(celltypes,split.ct,PBC=PBC) # split dataset
logys.ct <- lapply(ys.ct,function(y) voom(y)$E) #cpm transformation
hvgs <- lapply(logys.ct, highvarg) # high variable genes for PCA
B

NK

T8

cDC

cM

ncM

pDC

Removing the Processing Cohort effect
B

NK

T8

cDC

cM

ncM

pDC

T1
k=5
y.all <- PBC[1:nrow(PBC),,keep.lib.sizes=FALSE]
pmin.all <- find_p(y.all)
y.all <- calcNormFactors(y.all, method="upperquartile", p=pmin.all)
nf.all <- y.all$samples$norm.factors
logy.all <- voom(y.all)$E
y.all$samples$ind_covT1 <- factor(paste(y.all$samples$ind_cov, y.all$samples$cg_cov,sep="_"))
M <- replicate.matrix(y.all$samples$ind_covT1)
rownames(M) <- y.all$samples$ind_covT1
group <- factor(paste(y.all$samples$fk.tr, y.all$samples$cg_cov,sep="."))
ruv2T1 <- RUV2mod(Y = t(logy.all), X = group, ctl = hk.ind, k=k, include.intercept = F, Z = y.all$samples$pop_cov)
ruv3T1 <- RUVIIIW(Y = t(logy.all), M = M, ctl = hk.ind, k = k, return.info = T)
ruv4T1 <- RUV4mod(Y = t(logy.all), X = group, ctl = hk.ind, k=k, Z = y.all$samples$pop_cov)
RUV2
B

NK

T8

cDC

cM

ncM

pDC

RUVIII
B

NK

T8

cDC

cM

ncM

pDC

RUV4
B

NK

T8

cDC

cM

ncM

pDC

T2
RUV2
B

NK

T8

cDC

cM

ncM

pDC

RUVIII
B

NK

T8

cDC

cM

ncM

pDC

RUV4
B

NK

T8

cDC

cM

ncM

pDC

T3
hk.ind <- rownames(PBC_S) %in% hkGagnon[-high_varg]
y.bc <- PBC_S
pmin.bc <- find_p(bc)
y.bc <- calcNormFactors(y.bc, method="upperquartile", p=pmin.bc)
nf.bc <- y.bc$samples$norm.factors
logy.bc <- voom(y.bc)$E
M.bc <- replicate.matrix(y.bc$samples$ind_cov)
rownames(M.bc) <- y.bc$samples$ind_cov
ruv2T3 <- RUV2mod(Y = t(logy.bc), X = y.bc$samples$fk.tr, Z=y.bc$samples$pop.cov , ctl = hk.ind, k=k)
ruv3T3 <- RUVIIIW(Y = t(logy.bc), M = M.bc, ctl = hk.ind, k = k, return.info = T)
ruv4T3 <- RUV4mod(Y = t(logy.bc), X = y.bc$samples$fk.tr, Z=y.bc$samples$pop.cov , ctl = hk.ind, k=k)
RUV2
B

NK

T8

cDC

cM

ncM

pDC

RUVIII
B

NK

T8

cDC

cM

ncM

pDC

RUV4
B

NK

T8

cDC

cM

ncM

pDC
